set up

In this dataset, I will be analyzing the traffic stops in Wichita, KS.

library(tidyverse)
## -- Attaching packages ------------------------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.2.1     v purrr   0.3.2
## v tibble  2.1.3     v dplyr   0.8.3
## v tidyr   1.0.0     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.4.0
## -- Conflicts ---------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
stops <- read_csv("https://datajournalism.tech/wp-content/uploads/2019/10/wichita.csv")
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
##   .default = col_character(),
##   X1 = col_double(),
##   date = col_date(format = ""),
##   time = col_time(format = ""),
##   lat = col_double(),
##   lng = col_double(),
##   subject_age = col_double(),
##   citation_issued = col_logical(),
##   posted_speed = col_double(),
##   vehicle_year = col_double()
## )
## See spec(...) for full column specifications.
population_2016 <- tibble(subject_race = c("asian/pacific islander", "black", "hispanic", "other/unknown", "white"), num_people = c(19294, 42485, 65090, 16686, 245499)) %>% mutate(subject_race = as.factor(subject_race))

center_lat <- 37.685260
center_lng <- -97.336411

exploratory data analysis

You can also embed plots, for example:

##  [1] "X1"                      "raw_row_number"         
##  [3] "date"                    "time"                   
##  [5] "location"                "lat"                    
##  [7] "lng"                     "subject_age"            
##  [9] "subject_race"            "subject_sex"            
## [11] "type"                    "disposition"            
## [13] "violation"               "citation_issued"        
## [15] "outcome"                 "posted_speed"           
## [17] "vehicle_color"           "vehicle_make"           
## [19] "vehicle_model"           "vehicle_year"           
## [21] "raw_defendant_race"      "raw_defendant_ethnicity"
## [1] 57750
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 57750 obs. of  22 variables:
##  $ X1                     : num  1 2 3 4 5 6 7 8 9 10 ...
##  $ raw_row_number         : chr  "923578" "923657" "912091" "923680" ...
##  $ date                   : Date, format: "2016-01-01" "2016-01-01" ...
##  $ time                   : 'hms' num  18:00:00 18:08:00 18:11:00 18:13:00 ...
##   ..- attr(*, "units")= chr "secs"
##  $ location               : chr  "N WEST ST, KS, 67205" "8000 W 13TH ST N, WICHITA, KS, 67212" "500 S LIMUEL ST, WICHITA, KS, 67235" "7600 W 21ST ST N, WICHITA, KS, 67205" ...
##  $ lat                    : num  37.7 37.7 37.7 37.7 37.7 ...
##  $ lng                    : num  -97.4 -97.4 -97.5 -97.4 -97.4 ...
##  $ subject_age            : num  16 44 20 21 28 27 15 20 23 NA ...
##  $ subject_race           : chr  "white" "white" "white" "hispanic" ...
##  $ subject_sex            : chr  "female" "male" "male" "female" ...
##  $ type                   : chr  "vehicular" "vehicular" "vehicular" "vehicular" ...
##  $ disposition            : chr  "DISMISSED" "GUILTY (IVR)" "DISMISSED WITH PREJUDICE; DISMISSED WITH PREJUDICE" "GUILTY" ...
##  $ violation              : chr  "RUN STOP SIGN" "SPEED OVER LIMIT" "DUI; INATTENTIVE DRIVING" "SPEED OVER LIMIT" ...
##  $ citation_issued        : logi  TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ outcome                : chr  "citation" "citation" "citation" "citation" ...
##  $ posted_speed           : num  NA 40 NA 40 40 40 NA NA NA NA ...
##  $ vehicle_color          : chr  "BURGUNDY OR MAROON" "\"ALUMINUM, SILVER\"" "WHITE" "\"ALUMINUM, SILVER\"" ...
##  $ vehicle_make           : chr  "JEEP (1989 TO PRESENT)" "HYUNDAI" "HONDA" "TOYOTA" ...
##  $ vehicle_model          : chr  NA "TUCSON" NA NA ...
##  $ vehicle_year           : num  2008 NA NA NA NA ...
##  $ raw_defendant_race     : chr  "W" "W" "W" "W" ...
##  $ raw_defendant_ethnicity: chr  "N" "N" "N" "H" ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   X1 = col_double(),
##   ..   raw_row_number = col_character(),
##   ..   date = col_date(format = ""),
##   ..   time = col_time(format = ""),
##   ..   location = col_character(),
##   ..   lat = col_double(),
##   ..   lng = col_double(),
##   ..   subject_age = col_double(),
##   ..   subject_race = col_character(),
##   ..   subject_sex = col_character(),
##   ..   type = col_character(),
##   ..   disposition = col_character(),
##   ..   violation = col_character(),
##   ..   citation_issued = col_logical(),
##   ..   outcome = col_character(),
##   ..   posted_speed = col_double(),
##   ..   vehicle_color = col_character(),
##   ..   vehicle_make = col_character(),
##   ..   vehicle_model = col_character(),
##   ..   vehicle_year = col_double(),
##   ..   raw_defendant_race = col_character(),
##   ..   raw_defendant_ethnicity = col_character()
##   .. )
##        X1        raw_row_number          date                time         
##  Min.   :    1   Length:57750       Min.   :2016-01-01   Length:57750     
##  1st Qu.:14438   Class :character   1st Qu.:2016-03-16   Class1:hms       
##  Median :28876   Mode  :character   Median :2016-05-29   Class2:difftime  
##  Mean   :28876                      Mean   :2016-06-10   Mode  :numeric   
##  3rd Qu.:43313                      3rd Qu.:2016-08-31                    
##  Max.   :57750                      Max.   :2016-12-31                    
##                                                                           
##    location              lat             lng           subject_age   
##  Length:57750       Min.   :37.47   Min.   :-101.36   Min.   :11.00  
##  Class :character   1st Qu.:37.67   1st Qu.: -97.37   1st Qu.:24.00  
##  Mode  :character   Median :37.69   Median : -97.34   Median :33.00  
##                     Mean   :37.69   Mean   : -97.33   Mean   :36.71  
##                     3rd Qu.:37.70   3rd Qu.: -97.28   3rd Qu.:48.00  
##                     Max.   :38.48   Max.   : -96.75   Max.   :99.00  
##                     NA's   :1167    NA's   :1167      NA's   :10128  
##  subject_race       subject_sex            type          
##  Length:57750       Length:57750       Length:57750      
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character  
##                                                          
##                                                          
##                                                          
##                                                          
##  disposition         violation         citation_issued   outcome         
##  Length:57750       Length:57750       Mode:logical    Length:57750      
##  Class :character   Class :character   TRUE:57750      Class :character  
##  Mode  :character   Mode  :character                   Mode  :character  
##                                                                          
##                                                                          
##                                                                          
##                                                                          
##   posted_speed    vehicle_color      vehicle_make       vehicle_model     
##  Min.   : 20.00   Length:57750       Length:57750       Length:57750      
##  1st Qu.: 30.00   Class :character   Class :character   Class :character  
##  Median : 40.00   Mode  :character   Mode  :character   Mode  :character  
##  Mean   : 39.93                                                           
##  3rd Qu.: 40.00                                                           
##  Max.   :304.00                                                           
##  NA's   :35149                                                            
##   vehicle_year   raw_defendant_race raw_defendant_ethnicity
##  Min.   :1962    Length:57750       Length:57750           
##  1st Qu.:2001    Class :character   Class :character       
##  Median :2005    Mode  :character   Mode  :character       
##  Mean   :2005                                              
##  3rd Qu.:2009                                              
##  Max.   :2999                                              
##  NA's   :43236
## # A tibble: 5 x 3
##   subject_race               n   prop
##   <chr>                  <int>  <dbl>
## 1 asian/pacific islander  1607 0.0278
## 2 black                   8038 0.139 
## 3 hispanic                6709 0.116 
## 4 other/unknown           9335 0.162 
## 5 white                  32061 0.555

To conduct a benchmark test, I will compute the proportions of demographic data in Wichita, KS.

population_2016 <- population_2016 %>% 
  mutate(prop = num_people / sum(num_people))

Next step I will combine the two tables of population_2016 and race_group to conduct a benchmark test.

stops_final <- race_group %>% 
  left_join(population_2016, by = "subject_race") %>% 
  mutate(stop_rate = n / num_people)
## Warning: Column `subject_race` joining character vector and factor,
## coercing into character vector

Data Visualization

I will visualize that stop rate by the police in Wichita, KS using a bar chart.

bar <- ggplot(stops_final, aes(x=reorder(subject_race,stop_rate), y = stop_rate))+
  geom_bar(stat="identity", position="identity", fill="pink")+
  labs(title="Stopped Drivers by Race", subtitle = "African American drivers got stopped more than White drivers in the city of Wichita, Kansas")+
  coord_flip()+
geom_label(aes(x= subject_race, y=stop_rate, label= round(stop_rate,2)))
bar

Here is an interactive map. click on the dots to see more details about the drivers.

library(httpuv)
library(leaflet)
race <- colorFactor(c("pink", "blue", "white", "burlywood3", "green"),
          domain = c("asian/pacific islander", "black", "white", "hispanic", "other/unknown"), ordered = TRUE)
f <- stops %>% drop_na(lat, lng)
map <- leaflet(f) %>% 
  addProviderTiles(providers$CartoDB) %>% 
  setView(lng = center_lng, lat = center_lat, zoom = 16) %>% 
  addCircleMarkers(~lng,
                   ~lat,
                   popup=paste("Race:",  f$subject_race, "Sex:", f$subject_sex, "Age:", f$subject_age),
                   weight = 3,
                   radius = 4,
                   color=~race(subject_race),
                   stroke=F,
                   fillOpacity =1)
map